package com.open.util;

import cn.hutool.core.io.FileUtil;

import java.util.HashSet;
import java.util.Set;

/**
 * @author typsusan
 * <p>des</p>
 **/
public class KeyWordUtil {

    private static Set<String> selectedParagraphs = new HashSet<>();

    public static String findMostImportantParagraph(String document, String keyword) {
        String[] paragraphs = document.split("\r\n");
        String mostImportantParagraph = null;
        double highestImportance = 0.0;

        for (int i = 0; i < paragraphs.length; i++) {
            String paragraph = paragraphs[i];
            if (!isDuplicate(paragraph)) {
                double importance = calculateParagraphImportance(paragraph, keyword);
                if (importance > highestImportance) {
                    highestImportance = importance;
                    mostImportantParagraph = paragraph;
                }
            }
        }
        if (mostImportantParagraph != null) {
            selectedParagraphs.add(mostImportantParagraph);
        }
        return mostImportantParagraph;
    }

    public static double calculateParagraphImportance(String paragraph, String keyword) {
        int keywordCount = countKeywordOccurrences(paragraph, keyword);
        int totalWords = paragraph.split("\\s+").length;

        double keywordFrequency = (double) keywordCount / totalWords;

        // 这里可以根据需要对关键字频率进行权重调整

        return keywordFrequency;
    }

    public static int countKeywordOccurrences(String text, String keyword) {
        int count = 0;
        int index = text.indexOf(keyword);
        while (index != -1) {
            count++;
            index = text.indexOf(keyword, index + keyword.length());
        }
        return count;
    }

    public static boolean isDuplicate(String paragraph) {
        return selectedParagraphs.contains(paragraph);
    }

    public static String extractContextAfterKeywordWithTitles(String document, String keyword, int maxLines) {
        String[] paragraphs = document.split("\r\n");
        int keywordIndex = -1;

        for (int i = 0; i < paragraphs.length; i++) {
            if (paragraphs[i].contains(keyword)) {
                keywordIndex = i;
                break;
            }
        }

        if (keywordIndex != -1) {
            StringBuilder result = new StringBuilder();
            int linesCount = 0;

            for (int i = keywordIndex + 1; i < paragraphs.length; i++) {
                if (linesCount >= maxLines || isTitle(paragraphs[i])) {
                    break;
                }

                result.append(paragraphs[i]).append("\n");
                linesCount++;
            }

            return result.toString();
        } else {
            return "关键字未找到";
        }
    }

    public static boolean isTitle(String line) {
        // 根据实际情况判断是否为标题
        // 这里可以使用正则表达式或其他方法来判断
        return line.matches("^[\\d]+\\..*?((?=^[\\d]+\\.)|$)");
    }

    public static void main(String[] args) {
        String document = FileUtil.readUtf8String("C:\\Users\\typsusan\\Desktop\\test.txt");
        String keyword = "【强制】单行字符数限制不超过 120 个，超出需要换行，换行时遵循如下原则： ";

        String context = extractContextAfterKeywordWithTitles(document, keyword, 10);
        System.out.println(context);
    }

}
